dragonFirst, we create a dataset containing element network information joined with other attributes of elements. We specifically only consider elements that are known to form at least five minerals on Earth since 4.33 Ga. The following elements are therefore excluded (in parentheses is given the number of minerals it forms): Dy (1), Er (1), Gd (1), Hf (1), Sm (1), Re (2), Rb (3).
# Prepare if file not present. Otherwise, read in the file.
if (file.exists(data_file)) {
element_networks_info <- read_csv(data_file)
} else {
mineral_threshold <- 5
dragon:::element_info %>%
# Remove those not present **since** 4.33 Ga
filter(element != "Tc") -> elements
purrr::map_df(elements$element, parse_element_network) %>%
left_join(elements) %>%
mutate(n_elements = as.numeric(n_elements),
n_minerals = as.numeric(n_minerals),
n_localities = as.numeric(n_localities)) %>%
filter(n_minerals >= mineral_threshold) -> element_networks_info
# Save to file
write_csv(element_networks_info, data_file)
}Here is the full dataset used for analysis:
datatable(element_networks_info)
5 First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.
# regression
# No
#lm(n_minerals ~ n_elements, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)
# Yes
lm(log(n_minerals) ~ n_elements, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)# No
#lm(n_minerals ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No
#lm(log(n_minerals) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.
get_model_info(model_ylog, "n_elements") -> model_info
make_plot(
element_networks_info %>% mutate(n_minerals_log = log(n_minerals)),
n_elements,
n_minerals_log,
model_info,
"Number of Elements",
"Log Number of Minerals",
c("C", "H", "O", "N", "P", "S"),
4 #seed
) -> model1_plot
model1_plot
First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.
# regression
# No
#lm(n_localities ~ n_elements, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)
# Yes
lm(log(n_localities) ~ n_elements, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)# No
#lm(n_localities ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No
#lm(log(n_localities) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.
get_model_info(model_ylog, "n_elements") -> model_info
make_plot(
element_networks_info %>% mutate(n_localities_log = log(n_localities)),
n_elements,
n_localities_log,
model_info,
"Number of Elements",
"Log Number of Localities",
c("C", "H", "O", "N", "Au", "P"),
10
) -> model2_plot
# S and P have to be labeled separately
model2_plot +
geom_text(
aes(label = ifelse(element == "S", "S", "")),
nudge_y = 0.2,
nudge_x = -1,
fontface = "bold",
color = label.color,
size = label.size
) model2_plotNote that there are six elements which do not appear in this analysis because they are missing crust data - C, H, N, REE (rare earth elements), Rh, Te.
First, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log x and untransformed y, whose diagnostics best meet assumptions.
# regression
# No
#lm(n_elements ~ element_crust_percent_weight, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)
# No
#lm(log(n_elements) ~ element_crust_percent_weight, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)# Yes
lm(n_elements ~ log(element_crust_percent_weight), data = element_networks_info) -> model_xlog
performance::check_model(model_xlog)# No
#lm(log(n_elements) ~ log(element_crust_percent_weight), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS A POSITIVE RELATIONSHIP.
# Since we have a logged x, have to remake model:
element_networks_info %>%
mutate(element_crust_percent_weight_log = log(element_crust_percent_weight)) -> forfit
lm(n_elements ~ element_crust_percent_weight_log, data = forfit) -> model_xlog
get_model_info(model_xlog, "element_crust_percent_weight_log") -> model_info
make_plot(
element_networks_info %>%
mutate(element_crust_percent_weight_log = log(element_crust_percent_weight)),
element_crust_percent_weight_log,
n_elements,
model_info,
"Log crust percentage",
"Number of Elements",
c("As", "Pb", "Cu", "S", "O", "Fe", "Mn", "Zn", "P", "Mo", "Co", "Br", "Sc", "Yb", "Ga"),
11
) -> model3_plot
# P and Ni separately, except the bottom P just allows the first P....
model3_plot +
# This is an insane hack for P and I DO NOT REGRET IT!!
geom_text(
aes(label = ifelse(element == "P", "P", "")),
nudge_y = 2,
nudge_x = 0,
fontface = "bold",
color = "white",
size = label.size
) +
geom_text(
aes(label = ifelse(element == "Ni", "Ni", "")),
nudge_y = -2.5,
nudge_x = -0.4,
fontface = "bold",
color = label.color,
size = label.size
)model3_plotFirst, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with both untransformed y and x, whose diagnostics best meet assumptions.
# Yes
lm(pauling ~ n_elements, data = element_networks_info) -> model_plain
performance::check_model(model_plain)# No
#lm(log(pauling) ~ n_elements, data = element_networks_info) -> model_ylog
#performance::check_model(model_ylog)
# No
#lm(pauling ~ log(n_elements), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No
#lm(log(pauling) ~ log(n_elements), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS NO RELATIONSHIP.:
get_model_info(model_plain, "n_elements") -> model_info
make_plot(
element_networks_info,
n_elements,
pauling,
model_info,
"Number of Elements",
"Pauling electronegativity"
) -> model4_plot
model4_plotFirst, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with log y and untransformed x, whose diagnostics best meet assumptions.
# No
#lm(n_minerals ~ pauling, data = element_networks_info) -> model_plain
#performance::check_model(model_plain)
# Yes
lm(log(n_minerals) ~ pauling, data = element_networks_info) -> model_ylog
performance::check_model(model_ylog)# No
#lm(n_minerals ~ log(pauling), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No but close
#lm(log(pauling) ~ log(n_minerals), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS NO RELATIONSHIP.
get_model_info(model_ylog, "pauling") -> model_info
make_plot(
element_networks_info %>% mutate(n_minerals_log = log(n_minerals)),
pauling,
n_minerals_log,
model_info,
"Pauling electronegativity",
"Log number of minerals formed"
) -> model5_plot
model5_plotFirst, we explore whether we should likely transform an axis on a log-scale. Model diagnostics are shown below for the model with both axes untransformed; all diagnostics are about the same, so we’ll use the regular data.
# Yes
lm(n_elements ~ number_of_protons, data = element_networks_info) -> model_plain
performance::check_model(model_plain)# No
#lm(log(n_elements) ~ number_of_protons, data = element_networks_info) -> model_ylog
#performance::check_model(model_ylog)
# No
#lm(n_elements ~ log(number_of_protons), data = element_networks_info) -> model_xlog
#performance::check_model(model_xlog)
# No
#lm(log(n_elements) ~ log(number_of_protons), data = element_networks_info) -> model_xlog_ylog
#performance::check_model(model_xlog_ylog)The resulting model is as follows - THERE IS A NEGATIVE RELATIONSHIP.
get_model_info(model_plain, "number_of_protons") -> model_info
make_plot(
element_networks_info,
n_elements,
number_of_protons,
model_info,
"Number of Elements",
"Atomic number", # variable is number_of_protons which is equivalent
c("As", "Pb", "Cu", "S", "O", "Fe", "Mn", "P", "Zn", "Ni", "Co", "Br", "Sc", "Yb", "Ga", "C", "H", "N", "Bi", "U")
) -> model6_plot
# Mo is specifically annoying to label because it's ON the regression line
model6_plot +
geom_text(
aes(label = ifelse(element == "Mo", "Mo", "")),
nudge_y = 3,
fontface = "bold",
color = label.color,
size = label.size) -> model6_plot
model6_plotHere we export figures to PDF files for use in manuscript.
Figure 1 is two panels comprised of model1_plot and model2_plot.
fig1_grid <- plot_grid(model1_plot, model2_plot, labels = "AUTO", nrow = 2)
save_plot(ms_fig1_file, fig1_grid, base_height = 10, base_width = 7)Figure 2 is two panels comprised of model3_plot and model6_plot.
fig2_grid <- plot_grid(model3_plot, model6_plot, labels = "AUTO", nrow = 2)
save_plot(ms_fig2_file, fig2_grid, base_height = 10, base_width = 7)The following shows the R version and package versions loaded for this analysis to enable reproducibility.
sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] DT_0.20 cowplot_1.1.1 ggrepel_0.9.1 ggtext_0.1.1
## [5] performance_0.8.0 broom_0.7.12 dragon_1.2.0 forcats_0.5.1
## [9] stringr_1.4.0 dplyr_1.0.8 purrr_0.3.4 readr_2.1.2
## [13] tidyr_1.2.0 tibble_3.1.6 ggplot2_3.3.5 tidyverse_1.3.1
##
## loaded via a namespace (and not attached):
## [1] colorspace_2.0-3 ellipsis_0.3.2 rprojroot_2.0.2
## [4] markdown_1.1 fs_1.5.2 gridtext_0.1.4
## [7] rstudioapi_0.13 farver_2.1.0 roxygen2_7.1.2
## [10] remotes_2.4.2 bit64_4.0.5 golem_0.3.1
## [13] fansi_1.0.2 lubridate_1.8.0 xml2_1.3.3
## [16] splines_4.1.2 knitr_1.37 config_0.3.1
## [19] pkgload_1.2.4 jsonlite_1.8.0 dbplyr_2.1.1
## [22] shinydashboard_0.7.2 shiny_1.7.1 compiler_4.1.2
## [25] httr_1.4.2 backports_1.4.1 Matrix_1.4-0
## [28] assertthat_0.2.1 fastmap_1.1.0 cli_3.2.0
## [31] later_1.3.0 htmltools_0.5.2 prettyunits_1.1.1
## [34] tools_4.1.2 gtable_0.3.0 glue_1.6.2
## [37] Rcpp_1.0.8 cellranger_1.1.0 jquerylib_0.1.4
## [40] vctrs_0.3.8 nlme_3.1-155 crosstalk_1.2.0
## [43] insight_0.16.0 xfun_0.29 ps_1.6.0
## [46] brio_1.1.3 testthat_3.1.2 rvest_1.0.2
## [49] mime_0.12 lifecycle_1.0.1 scales_1.1.1
## [52] vroom_1.5.7 ragg_1.2.2 hms_1.1.1
## [55] promises_1.2.0.1 parallel_4.1.2 yaml_2.3.5
## [58] see_0.6.9 sass_0.4.0 stringi_1.7.6
## [61] highr_0.9 bayestestR_0.11.5 desc_1.4.0
## [64] pkgbuild_1.3.1 attempt_0.3.1 systemfonts_1.0.4
## [67] rlang_1.0.1 pkgconfig_2.0.3 lattice_0.20-45
## [70] evaluate_0.15 labeling_0.4.2 patchwork_1.1.1
## [73] htmlwidgets_1.5.4 bit_4.0.4 processx_3.5.2
## [76] tidyselect_1.1.2 magrittr_2.0.2 R6_2.5.1
## [79] generics_0.1.2 DBI_1.1.2 pillar_1.7.0
## [82] haven_2.4.3 withr_2.4.3 mgcv_1.8-39
## [85] datawizard_0.2.3 modelr_0.1.8 crayon_1.5.0
## [88] shinyWidgets_0.6.4 utf8_1.2.2 tzdb_0.2.0
## [91] rmarkdown_2.11 usethis_2.1.5 grid_4.1.2
## [94] readxl_1.3.1 callr_3.7.0 reprex_2.0.1
## [97] digest_0.6.29 xtable_1.8-4 httpuv_1.6.5
## [100] textshaping_0.3.6 munsell_0.5.0 dockerfiler_0.1.4
## [103] bslib_0.3.1